
import requests
from lxml import etree
import pymysql
import sys
import re

def getInfo(url):
    req = requests.get(url)
    tree = etree.HTML(req.text)
    res = tree.xpath('//*[@id="imgList"]/ul/ul/li')

    for i,item in enumerate(res):
        value = item.xpath('./div[1]/font')[0].text
        value_num = float(re.search(r'\d+(?:\.\d+)?',value).group(0))
        if value_num < 20:
            value = str(value_num)
        elif value_num > 500 and value_num < 10000:#吨换算成斤
            value = str(value_num/2000)
        else:
            return;
           
        des = item.xpath('./a[2]/span')[0].text
        breed = item.xpath('./span')[0].text
        name = item.xpath('./div[3]/div')[0].text.strip()
        if breed:  
            # SQL 插入语句
            sql = "INSERT INTO price_shuidao(value,des,breed,name) VALUES ('"+value+"','"+des+"','"+breed+"','"+name+"')"    
        else:
            sql = "INSERT INTO price_shuidao(value,des,breed,name) VALUES ('"+value+"','"+des+"','"+"','"+name+"')"    
        try:
            cursor.execute(sql)
            db.commit()
        except:  
            db.rollback()
            info=sys.exc_info()  
            print(info[0],":",info[1])



db = pymysql.connect("localhost","admin","admin","admin",charset="utf8" )
# db.autocommit(True)
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
# 创建表price_shuidao
if cursor.execute("DROP TABLE IF EXISTS price_shuidao;") is not None:
    # 数据库设计我不熟悉
    cursor.execute("CREATE TABLE price_shuidao(value TEXT,des TEXT,breed TEXT,name TEXT)")
    db.commit()
    
# 要抓取的内容有6页
totalIndex = 6
currentIndex = 1

while currentIndex <= totalIndex:
    url = "http://www.cnhnb.com/p/shuidao-301008-0-0-0-"+str(currentIndex)+"?page.orderBy=updateTime&notNegotiable=true"
    print(url)
    getInfo(url)
    currentIndex +=1
# 关闭数据库连接
db.close()

